Loading the data

In [1]:
%matplotlib inline
import sys
import re
import numpy

DO_CORRELATIONS, DO_PROJECTIONS, DO_REGRESSIONS, DO_CLUSTERS = True, True, True, True

if DO_PROJECTIONS:
    # from sklearn import manifold
    from sklearn import decomposition
if DO_REGRESSIONS:
    import statsmodels.api as sm
if DO_CLUSTERS:
    from sklearn import cluster
    from sklearn import metrics
    import scipy.cluster
    from scipy.sparse.linalg import eigsh

import matplotlib.pyplot as plt

import pdb

from tools_all import *

NBCS = [3,5,7]
NBC = max(NBCS)

cmD = make_colormap([d[1] for d in TOL_COLORS if d[0] <= NBC], extrapolate=False)
cmC = make_colormap([d[1] for d in TOL_COLORS], extrapolate=True)

FSIZE = 3

FIGS_REP = "figs/"
FIGS_EXT = ".png"

# FILE_AGG="data_focus_agg.csv"
# FILE_BIO="data_focus_bio.csv"
FILE_AGG="IUCN_EU_nbspc3+_focus_agg.csv"
FILE_BIO="IUCN_EU_nbspc3+_focus_bio.csv"
FILE_CLU="data_clusters.csv"

groups_bio = [['bioX1:NPP', 'bio15:PSeason', 'bio12:PTotY'],
              ['bio13:PWetM', 'bio16:PWetQ', 'bio14:PDryM', 'bio17:PDryQ', 'bio19:PColdQ', 'bio18:PWarmQ'],
              ['bio4:TSeason', 'bio7:TRngY', 'bio2:TMeanRngD', 'bio3:TIso', 'bio1:TMeanY'],
              ['bio5:TMaxWarmM', 'bio10:TMeanWarmQuarter', 'bio6:TMinColdM', 'bio11:TMeanColdQ', 'bio8:TMeanWetQ', 'bio9:TMeanDryQ']
              ]
groups_agg = [['MEAN_HYP', 'MEAN_LOP', 'MEAN_OO', 'MEAN_AL', 'MEAN_OL', 'MEAN_SF'],
              ['MEAN_HOD', 'MEAN_LOPT', 'MEAN_OT', 'MEAN_CM', 'MEAN_ETH']              
              ]

ex_agg = ['MEAN_HOD', 'MEAN_LOPT'] #, 'MEAN_OT', 'MEAN_CM']
# groups_bio = [['bioX1:NPP', 'bio15:PSeason', 'bio12:PTotY']]
# groups_agg = [['MEAN_HYP', 'MEAN_LOP', 'MEAN_OO', 'MEAN_AL', 'MEAN_OL', 'MEAN_SF']]

ABC = load_ABC(FILE_AGG, FILE_BIO, FILE_CLU)
A, head_agg, map_agg = ABC[0]["data"], ABC[0]["head"], ABC[0]["map"]
B, head_bio, map_bio = ABC[1]["data"], ABC[1]["head"], ABC[1]["map"]
C, head_clu, map_clu = ABC[2]["data"], ABC[2]["head"], ABC[2]["map"]

#### Log precipitations
B_log = numpy.vstack([numpy.log10(B[:,i]+1) if re.match("bio[0-9]+:P", hb) else B[:,i] for i, hb in enumerate(head_bio)]).T
# org_map_vnames = dict(map_vnames)
# for i, hb in enumerate(head_bio):
#     if re.match("bio[0-9]+:P", hb):
#         map_vnames[hb] = "\\log(%s)" % org_map_vnames[hb]

#### filter variables
keep_agg = [vi for vi, v in enumerate(head_agg) if re.match("MEAN_", v)]
keep_aggM = [vi for vi, v in enumerate(head_agg) if re.match("MEAN_", v) and v != "MEAN_HOD" and v != "MEAN_LOPT"]
keep_bio = [vi for vi, v in enumerate(head_bio) if re.match("bio[0-9]+:", v)]

Akw = withen(A[:, keep_agg])
AMkw = withen(A[:, keep_aggM])
Bkw = withen(B[:, keep_bio])
BLkw = withen(B_log[:, keep_bio])
hA = [head_agg[i] for i in keep_agg]
hAM = [head_agg[i] for i in keep_aggM]
hB = [head_bio[i] for i in keep_bio]

# clu_data = BLkw
# clu_data = AMkw #Akw
clu_data = numpy.hstack([BLkw, AMkw])

lng_lat = B[:, [map_bio["longitude"], map_bio["latitude"]]]
/usr/lib/python3/dist-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
In [2]:
def mk_plot(oname):
    # plt.savefig(FIGS_REP+oname+FIGS_EXT)
    plt.show()
    # pass
def mk_out(fname):
    # return open(FIGS_REP+fname, "w")
    return sys.stdout
def close_out(fo):
    # fo.close()
    pass

Correlations

Dental traits

In [3]:
if DO_CORRELATIONS:
    Avs = []
    Ahs = []
    for hs in groups_agg:
        Avs.extend([map_agg[ha] for ha in hs])
        Ahs.extend(hs)

    n, D, vs, hs = ("Dent", A, Avs, Ahs)
    corrs = numpy.corrcoef(D[:,vs].T)
    
    nz = len(vs)
    fig, axes = plt.subplots(len(vs)-1, nz-1, figsize=(FSIZE*nz, FSIZE*nz), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})
    plot_corrs(D, corrs, vs, hs, axes)
    mk_plot("correlate%s" % n)

Climate variables

In [4]:
if DO_CORRELATIONS:
 
    Bvs = []
    Bhs = []
    for hs in groups_bio:
        Bvs.extend([map_bio[hb] for hb in hs])
        Bhs.extend(hs)
    
    n, D, vs, hs = ("Clim", B_log, Bvs, Bhs)
    corrs = numpy.corrcoef(D[:,vs].T)

    nz = len(vs)
    fig, axes = plt.subplots(nz-1, nz-1, figsize=(FSIZE*nz, FSIZE*nz), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})
    plot_corrs(D, corrs, vs, hs, axes)
    mk_plot("correlate%s" % n)

Projections

In [5]:
################################
def project_data(X, model, mname, head, dname, axes, di):
        U = numpy.eye(X.shape[1], X.shape[1])
        if mname == "PCA":
            model.fit(X)
            Xproj = model.transform(X)
            Uproj = model.transform(U)
        else:
            Xproj = model.fit_transform(numpy.vstack([X[::50, :], U]))
            Uproj = Xproj[-U.shape[0]:, :]
        plot_proj(Xproj, Uproj, head, dname, axes, di)
################################
if DO_PROJECTIONS:
    NCP=2
    datas = [("Dent", Akw, hA),
         ("Dent-{HOD, LOPT}", AMkw, hAM),
         ("Clim", BLkw, hB),
         ("Dent+Clim", numpy.hstack([Akw, BLkw]), hA+hB)
         ]
    models = [("PCA", decomposition.PCA(n_components=NCP))]#,
          # ("PCoA", manifold.MDS(n_components=NCP, metric=True)),
          # ("MDS", manifold.MDS(n_components=NCP, metric=False))]

    for mi, (mname, model) in enumerate(models):
        fig, axes = plt.subplots(2, len(datas), figsize=(FSIZE*len(datas), FSIZE*2))
        for di, (dname, X, head) in enumerate(datas):        
            project_data(X, model, mname, head, dname, axes, di)
            U = numpy.eye(X.shape[1], X.shape[1])        
        mk_plot("projection%s" % mname)

Regressions

One variable

In [6]:
################################
def regress_one_groups(gA, gB, A, head_A, map_A, B, head_B, map_B, collect={}, axes=None):
    for i, hb in enumerate(gB):
        if hb not in collect:
            collect[hb] = {}
        for j, ha in enumerate(gA):

            ai = map_A[ha]
            bi = map_B[hb]
            
            ##################################
            # y = A[:,ai]
            # X = B[:,[bi]]
            # defv = def_vals.get(ha, 0)
            # if defv is not None:
            #     ### drop rows with agg==0
            #     X = X[y!=defv, :]
            #     y = y[y!=defv]                
            ##################################
            X = A[:,[ai]]
            y = B[:,bi]
                            
            Xr = sm.add_constant(X)
            # model = sm.OLS(y, Xr)
            model = sm.GLS(y, Xr)
            results = model.fit()

            if axes is not None:
                plot_reg_one(X, y, results, ai, head_A, bi, head_B, axes, i, j)
            collect[hb][ha] = results 
    return collect
################################
In [7]:
if DO_REGRESSIONS:
    TOPR = 3
    collect = {}
    gbi, gai = (0, 0)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [8]:
if DO_REGRESSIONS:
    gbi, gai = (0, 1)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [9]:
if DO_REGRESSIONS:
    gbi, gai = (1, 0)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [10]:
if DO_REGRESSIONS:
    gbi, gai = (1, 1)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [11]:
if DO_REGRESSIONS:
    gbi, gai = (2, 0)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [12]:
if DO_REGRESSIONS:
    gbi, gai = (2, 1)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [13]:
if DO_REGRESSIONS:
    gbi, gai = (3, 0)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))
In [14]:
if DO_REGRESSIONS:
    gbi, gai = (3, 1)
    gB = groups_bio[gbi]
    gA = groups_agg[gai]
    nx, ny = len(gA), len(gB)
    # ny, nx = len(gA), len(gB)
    fig, axes = plt.subplots(ny, nx, figsize=(FSIZE*nx, FSIZE*ny), 
                             sharex='col', sharey='row', gridspec_kw={'hspace': 0, 'wspace': 0})        
    regress_one_groups(gA, gB, A, head_agg, map_agg, B_log, head_bio, map_bio, collect, axes)
    mk_plot("regression1VDent%dvClim%d" % (gai, gbi))

Multiple variables

In [15]:
if DO_REGRESSIONS:
    fo = mk_out("regressionModels.txt")
    for hb, xps in collect.items():
        bi = map_bio[hb]
        y = B_log[:, bi]
        fo.write("\n# %s\n##############\n" % hb)
        
        ks = sorted(xps.keys(), key=lambda x: regres_score(xps[x]))
        fo.write("\n".join(["\t%s\t%s" % (v, regres_log(xps[v], [v], "")) for v in ks[:TOPR]])+"\n")
        best_score = regres_score(xps[ks[0]])
        
        new_xps = {}
        new_compat = {}
        for i, ki in enumerate(ks):
            for j, kj in enumerate(ks[:i]):
    
                pairH = (kj, ki)
                pair = (map_agg[kj], map_agg[ki])
                X = A[:, pair]            
                
                Xr = sm.add_constant(X)
                # model = sm.OLS(y, Xr)
                model = sm.GLS(y, Xr)
                results = model.fit()
                if regres_score(results) < best_score:
                    new_xps[pairH] = results
                    for vp in [0,1]:
                        if pairH[vp] not in new_compat:
                            new_compat[pairH[vp]] = set()
                        new_compat[pairH[vp]].add(pairH[1-vp])
    
        while len(new_xps) > 0:
            xps = new_xps
            compat = new_compat
            new_xps = {}
            new_compat = {}
            seen = set()
            ks = sorted(xps.keys(), key=lambda x: regres_score(xps[x]))
            fo.write("--------- (%d)\n" % len(ks))
            fo.write("\n".join(["\t%s\t%s" % ("+".join(v), regres_log(xps[v], v, "")) for v in ks[:TOPR]])+"\n")
            best_score = regres_score(xps[ks[0]])
            for k in ks:
                common = set.intersection(*[compat[kk] for kk in k])
                for c in common:
    
                    vrsH = tuple(sorted([c]+list(k)))
                    if vrsH not in seen:
                        seen.add(vrsH)
                        
                        vrs = [map_agg[v] for v in vrsH]
                        X = A[:, vrs]            
    
                        Xr = sm.add_constant(X)
                        # model = sm.OLS(y, Xr)
                        model = sm.GLS(y, Xr)
                        results = model.fit()
                        if regres_score(results) < best_score:
                            new_xps[vrsH] = results
                            for vi, vp in enumerate(vrsH):
                                if vp not in new_compat:
                                    new_compat[vp] = set()
                                new_compat[vp].update(vrsH[:vi])
                            new_compat[vp].update(vrsH[vi+1:])
    close_out(fo)
# bioX1:NPP
##############
	MEAN_OO	AIC=66306 MSE=194233.982899 F=6205 LL=-33151	[1858.01\,  -2182.14\, MEAN_OO]
	MEAN_LOP	AIC=66524 MSE=204061.777222 F=5694 LL=-33260	[2621.35\,  -1066.06\, MEAN_LOP]
	MEAN_OL	AIC=66643 MSE=209597.271448 F=5427 LL=-33319	[2639.95\,  -2146.35\, MEAN_OL]
--------- (16)
	MEAN_HYP+MEAN_AL	AIC=65595 MSE=165314.064868 F=4032 LL=-32794	[3570.16\,  -1108.86\, MEAN_HYP -1271.78\, MEAN_AL]
	MEAN_OO+MEAN_LOP	AIC=65604 MSE=165648.46038 F=4019 LL=-32799	[2328.04\,  -1305.20\, MEAN_OO -560.33\, MEAN_LOP]
	MEAN_HYP+MEAN_SF	AIC=65615 MSE=166037.2046 F=4005 LL=-32804	[2805.93\,  -926.21\, MEAN_HYP +1115.25\, MEAN_SF]
--------- (6)
	MEAN_HYP+MEAN_LOP+MEAN_SF	AIC=65258 MSE=153122.739707 F=3019 LL=-32625	[2799.18\,  -610.83\, MEAN_HYP -436.03\, MEAN_LOP +939.48\, MEAN_SF]
	MEAN_HYP+MEAN_OL+MEAN_SF	AIC=65344 MSE=156112.445083 F=2933 LL=-32668	[2806.50\,  -641.10\, MEAN_HYP -794.58\, MEAN_OL +961.15\, MEAN_SF]
	MEAN_AL+MEAN_HYP+MEAN_OO	AIC=65402 MSE=158208.574451 F=2875 LL=-32697	[3122.14\,  -1027.17\, MEAN_AL -780.68\, MEAN_HYP -777.29\, MEAN_OO]
--------- (1)
	MEAN_HYP+MEAN_LOP+MEAN_OO+MEAN_SF	AIC=65224 MSE=151919.599507 F=2291 LL=-32607	[3013.64\,  -851.12\, MEAN_HYP -431.12\, MEAN_LOP +624.43\, MEAN_OO +1291.48\, MEAN_SF]

# bio15:PSeason
##############
	MEAN_AL	AIC=-7452 MSE=0.0108248892488 F=2220 LL=3728	[2.04\,  -0.39\, MEAN_AL]
	MEAN_HYP	AIC=-6908 MSE=0.0122436529641 F=1451 LL=3456	[1.75\,  +0.12\, MEAN_HYP]
	MEAN_SF	AIC=-6658 MSE=0.0129556645199 F=1129 LL=3331	[1.91\,  +0.30\, MEAN_SF]
--------- (15)
	MEAN_HYP+MEAN_SF	AIC=-8225 MSE=0.00908477338259 F=1746 LL=4115	[1.70\,  +0.11\, MEAN_HYP +0.29\, MEAN_SF]
	MEAN_SF+MEAN_OO	AIC=-8208 MSE=0.00911922480577 F=1731 LL=4107	[1.81\,  +0.46\, MEAN_SF +0.29\, MEAN_OO]
	MEAN_AL+MEAN_HYP	AIC=-7964 MSE=0.00963706784814 F=1519 LL=3985	[1.89\,  -0.30\, MEAN_AL +0.07\, MEAN_HYP]
--------- (4)
	MEAN_AL+MEAN_HYP+MEAN_SF	AIC=-8442 MSE=0.00864634260327 F=1298 LL=4225	[1.79\,  -0.16\, MEAN_AL +0.09\, MEAN_HYP +0.21\, MEAN_SF]
	MEAN_AL+MEAN_OO+MEAN_SF	AIC=-8375 MSE=0.00877879039755 F=1256 LL=4191	[1.87\,  -0.14\, MEAN_AL +0.23\, MEAN_OO +0.34\, MEAN_SF]
	MEAN_AL+MEAN_HYP+MEAN_OO	AIC=-8301 MSE=0.00892672677039 F=1211 LL=4154	[1.75\,  -0.23\, MEAN_AL +0.17\, MEAN_HYP -0.25\, MEAN_OO]

# bio12:PTotY
##############
	MEAN_OO	AIC=563 MSE=0.0664828422143 F=6888 LL=-279	[3.22\,  -1.35\, MEAN_OO]
	MEAN_HYP	AIC=1016 MSE=0.0736677463295 F=5785 LL=-506	[3.91\,  -0.56\, MEAN_HYP]
	MEAN_LOP	AIC=1645 MSE=0.0849392174385 F=4432 LL=-820	[3.63\,  -0.61\, MEAN_LOP]
--------- (12)
	MEAN_HYP+MEAN_SF	AIC=-266 MSE=0.0550890165023 F=4613 LL=136	[3.80\,  -0.57\, MEAN_HYP +0.71\, MEAN_SF]
	MEAN_OO+MEAN_ETH	AIC=252 MSE=0.0619540192825 F=3857 LL=-123	[3.38\,  -1.54\, MEAN_OO -0.94\, MEAN_ETH]
	MEAN_OO+MEAN_LOP	AIC=268 MSE=0.0621803100987 F=3835 LL=-131	[3.40\,  -1.00\, MEAN_OO -0.22\, MEAN_LOP]
--------- (1)
	MEAN_HYP+MEAN_OO+MEAN_SF	AIC=-336 MSE=0.0542100111017 F=3149 LL=172	[3.98\,  -0.77\, MEAN_HYP +0.53\, MEAN_OO +1.01\, MEAN_SF]

# bio13:PWetM
##############
	MEAN_OO	AIC=328 MSE=0.0630419985177 F=5548 LL=-162	[2.55\,  -1.18\, MEAN_OO]
	MEAN_LOP	AIC=1194 MSE=0.0767010634942 F=3774 LL=-595	[2.91\,  -0.53\, MEAN_LOP]
	MEAN_OL	AIC=1263 MSE=0.0779026751173 F=3647 LL=-629	[2.92\,  -1.07\, MEAN_OL]
--------- (12)
	MEAN_HYP+MEAN_SF	AIC=-491 MSE=0.0523424299907 F=3792 LL=248	[2.92\,  -0.45\, MEAN_HYP +0.90\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=15 MSE=0.0587174186435 F=3141 LL=-4	[2.63\,  -1.23\, MEAN_OO -0.36\, MEAN_AL]
	MEAN_OO+MEAN_OT	AIC=44 MSE=0.0591033639202 F=3106 LL=-19	[2.53\,  -0.96\, MEAN_OO -1.17\, MEAN_OT]

# bio16:PWetQ
##############
	MEAN_OO	AIC=669 MSE=0.0681022565721 F=5617 LL=-332	[2.96\,  -1.23\, MEAN_OO]
	MEAN_LOP	AIC=1617 MSE=0.0844170429813 F=3678 LL=-806	[3.33\,  -0.55\, MEAN_LOP]
	MEAN_OL	AIC=1685 MSE=0.0857157817783 F=3556 LL=-840	[3.34\,  -1.11\, MEAN_OL]
--------- (12)
	MEAN_HYP+MEAN_SF	AIC=-114 MSE=0.0570182200079 F=3784 LL=60	[3.38\,  -0.48\, MEAN_HYP +0.90\, MEAN_SF]
	MEAN_OO+MEAN_OT	AIC=373 MSE=0.063677175864 F=3157 LL=-183	[2.95\,  -1.00\, MEAN_OO -1.24\, MEAN_OT]
	MEAN_OO+MEAN_ETH	AIC=378 MSE=0.0637507549044 F=3151 LL=-186	[3.12\,  -1.42\, MEAN_OO -0.92\, MEAN_ETH]
--------- (1)
	MEAN_HYP+MEAN_OO+MEAN_SF	AIC=-200 MSE=0.0559017215614 F=2602 LL=104	[3.58\,  -0.71\, MEAN_HYP +0.60\, MEAN_OO +1.23\, MEAN_SF]

# bio14:PDryM
##############
	MEAN_HYP	AIC=3836 MSE=0.139527373305 F=3027 LL=-1916	[1.75\,  -0.56\, MEAN_HYP]
	MEAN_AL	AIC=5023 MSE=0.182526557125 F=1274 LL=-2509	[0.49\,  +1.22\, MEAN_AL]
	MEAN_OO	AIC=5210 MSE=0.19042949401 F=1038 LL=-2603	[0.95\,  -0.88\, MEAN_OO]
--------- (9)
	MEAN_HYP+MEAN_OL	AIC=3263 MSE=0.122506066678 F=2031 LL=-1628	[1.78\,  -0.92\, MEAN_HYP +1.00\, MEAN_OL]
	MEAN_HYP+MEAN_LOP	AIC=3306 MSE=0.123708632331 F=1990 LL=-1650	[1.79\,  -0.89\, MEAN_HYP +0.46\, MEAN_LOP]
	MEAN_HYP+MEAN_OO	AIC=3442 MSE=0.127578694455 F=1862 LL=-1718	[2.14\,  -0.90\, MEAN_HYP +0.90\, MEAN_OO]

# bio17:PDryQ
##############
	MEAN_HYP	AIC=3897 MSE=0.141453452921 F=4055 LL=-1946	[2.52\,  -0.65\, MEAN_HYP]
	MEAN_OO	AIC=5111 MSE=0.186204134838 F=2019 LL=-2553	[1.64\,  -1.22\, MEAN_OO]
	MEAN_OL	AIC=5510 MSE=0.203838115482 F=1463 LL=-2753	[2.02\,  -1.10\, MEAN_OL]
--------- (10)
	MEAN_HYP+MEAN_OL	AIC=3730 MSE=0.13619200461 F=2191 LL=-1862	[2.54\,  -0.85\, MEAN_HYP +0.56\, MEAN_OL]
	MEAN_HYP+MEAN_LOP	AIC=3760 MSE=0.137100714976 F=2162 LL=-1877	[2.54\,  -0.83\, MEAN_HYP +0.24\, MEAN_LOP]
	MEAN_HYP+MEAN_ETH	AIC=3791 MSE=0.138068554723 F=2131 LL=-1892	[2.76\,  -0.73\, MEAN_HYP -0.82\, MEAN_ETH]

# bio19:PColdQ
##############
	MEAN_HYP	AIC=4751 MSE=0.171629675558 F=3278 LL=-2373	[2.66\,  -0.65\, MEAN_HYP]
	MEAN_OO	AIC=5199 MSE=0.189956706291 F=2536 LL=-2597	[1.83\,  -1.38\, MEAN_OO]
	MEAN_LOP	AIC=5688 MSE=0.212209166016 F=1807 LL=-2842	[2.23\,  -0.61\, MEAN_LOP]
--------- (8)
	MEAN_HYP+MEAN_AL	AIC=4673 MSE=0.168578292602 F=1709 LL=-2333	[2.81\,  -0.70\, MEAN_HYP -0.33\, MEAN_AL]
	MEAN_HYP+MEAN_SF	AIC=4688 MSE=0.169185692325 F=1695 LL=-2341	[2.62\,  -0.65\, MEAN_HYP +0.26\, MEAN_SF]
	MEAN_HYP+MEAN_OO	AIC=4702 MSE=0.169725460356 F=1683 LL=-2348	[2.50\,  -0.51\, MEAN_HYP -0.36\, MEAN_OO]

# bio18:PWarmQ
##############
	MEAN_HYP	AIC=1495 MSE=0.0821031095034 F=3487 LL=-745	[3.29\,  -0.46\, MEAN_HYP]
	MEAN_OO	AIC=2712 MSE=0.108164504299 F=1583 LL=-1354	[2.65\,  -0.82\, MEAN_OO]
	MEAN_LOP	AIC=2721 MSE=0.108389550781 F=1570 LL=-1358	[2.95\,  -0.41\, MEAN_LOP]
--------- (8)
	MEAN_HYP+MEAN_ETH	AIC=1135 MSE=0.0756616284185 F=2080 LL=-564	[3.60\,  -0.57\, MEAN_HYP -1.12\, MEAN_ETH]
	MEAN_HYP+MEAN_OO	AIC=1386 MSE=0.080084150534 F=1843 LL=-690	[3.45\,  -0.60\, MEAN_HYP +0.37\, MEAN_OO]
	MEAN_HYP+MEAN_HOD	AIC=1445 MSE=0.0811685343195 F=1789 LL=-719	[2.58\,  -0.46\, MEAN_HYP +0.70\, MEAN_HOD]

# bio4:TSeason
##############
	MEAN_LOP	AIC=58951 MSE=36722.0942511 F=3403 LL=-29473	[139.87\,  +349.62\, MEAN_LOP]
	MEAN_OL	AIC=59055 MSE=37599.738358 F=3220 LL=-29525	[136.02\,  +700.31\, MEAN_OL]
	MEAN_OO	AIC=59909 MSE=45616.2801319 F=1879 LL=-29952	[424.29\,  +581.91\, MEAN_OO]
--------- (18)
	MEAN_LOP+MEAN_SF	AIC=57589 MSE=26971.5952478 F=3115 LL=-28791	[264.14\,  +320.14\, MEAN_LOP -522.33\, MEAN_SF]
	MEAN_OL+MEAN_SF	AIC=57701 MSE=27667.1073568 F=2981 LL=-28847	[261.35\,  +641.30\, MEAN_OL -526.92\, MEAN_SF]
	MEAN_LOP+MEAN_ETH	AIC=57998 MSE=29589.6868725 F=2644 LL=-28996	[-113.37\,  +451.43\, MEAN_LOP +1109.91\, MEAN_ETH]
--------- (1)
	MEAN_LOP+MEAN_OL+MEAN_SF	AIC=57539 MSE=26660.8802852 F=2118 LL=-28765	[277.98\,  +722.66\, MEAN_LOP -822.17\, MEAN_OL -519.28\, MEAN_SF]

# bio7:TRngY
##############
	MEAN_LOP	AIC=27377 MSE=28.8277509381 F=4644 LL=-13686	[14.54\,  +11.44\, MEAN_LOP]
	MEAN_OL	AIC=27496 MSE=29.6122268363 F=4404 LL=-13746	[14.38\,  +22.98\, MEAN_OL]
	MEAN_OO	AIC=28303 MSE=35.5489923459 F=2932 LL=-14149	[23.53\,  +20.29\, MEAN_OO]
--------- (15)
	MEAN_LOP+MEAN_ETH	AIC=26686 MSE=24.6473471062 F=3091 LL=-13340	[8.41\,  +13.91\, MEAN_LOP +26.88\, MEAN_ETH]
	MEAN_OL+MEAN_ETH	AIC=26799 MSE=25.2816569792 F=2958 LL=-13396	[7.95\,  +28.23\, MEAN_OL +27.54\, MEAN_ETH]
	MEAN_LOP+MEAN_SF	AIC=26949 MSE=26.1595202583 F=2784 LL=-13471	[16.60\,  +10.96\, MEAN_LOP -8.65\, MEAN_SF]
--------- (1)
	MEAN_ETH+MEAN_LOP+MEAN_OL	AIC=26680 MSE=24.6059325857 F=2067 LL=-13336	[8.69\,  +26.43\, MEAN_ETH +18.83\, MEAN_LOP -10.13\, MEAN_OL]

# bio2:TMeanRngD
##############
	MEAN_HYP	AIC=17516 MSE=3.09021061966 F=4448 LL=-8756	[5.49\,  +3.20\, MEAN_HYP]
	MEAN_OO	AIC=18484 MSE=3.84756393419 F=2704 LL=-9240	[9.70\,  +6.41\, MEAN_OO]
	MEAN_LOP	AIC=19158 MSE=4.4823711213 F=1696 LL=-9577	[7.96\,  +2.73\, MEAN_LOP]
--------- (8)
	MEAN_HYP+MEAN_OT	AIC=17305 MSE=2.94568522663 F=2442 LL=-8649	[4.52\,  +3.87\, MEAN_HYP -7.57\, MEAN_OT]
	MEAN_HYP+MEAN_CM	AIC=17309 MSE=2.9484247224 F=2438 LL=-8651	[4.60\,  +3.82\, MEAN_HYP -7.15\, MEAN_CM]
	MEAN_HYP+MEAN_ETH	AIC=17390 MSE=3.00236911634 F=2354 LL=-8692	[4.30\,  +3.59\, MEAN_HYP +4.16\, MEAN_ETH]

# bio3:TIso
##############
	MEAN_AL	AIC=31564 MSE=74.3987935565 F=1170 LL=-15780	[45.24\,  -23.62\, MEAN_AL]
	MEAN_LOP	AIC=31790 MSE=78.311639541 F=891 LL=-15893	[50.94\,  -8.26\, MEAN_LOP]
	MEAN_OL	AIC=31849 MSE=79.360112265 F=821 LL=-15922	[50.85\,  -16.25\, MEAN_OL]
--------- (16)
	MEAN_LOP+MEAN_HYP	AIC=30569 MSE=59.3796471988 F=1292 LL=-15281	[41.77\,  -21.46\, MEAN_LOP +13.97\, MEAN_HYP]
	MEAN_AL+MEAN_LOP	AIC=30662 MSE=60.6461394752 F=1218 LL=-15328	[54.56\,  -22.41\, MEAN_AL -7.72\, MEAN_LOP]
	MEAN_OL+MEAN_HYP	AIC=30680 MSE=60.8852467653 F=1205 LL=-15337	[42.16\,  -43.60\, MEAN_OL +14.09\, MEAN_HYP]
--------- (3)
	MEAN_AL+MEAN_ETH+MEAN_LOP	AIC=30008 MSE=52.2830115062 F=1178 LL=-15000	[62.93\,  -19.70\, MEAN_AL -38.59\, MEAN_ETH -11.33\, MEAN_LOP]
	MEAN_AL+MEAN_ETH+MEAN_OL	AIC=30073 MSE=53.0553270477 F=1139 LL=-15032	[63.17\,  -19.93\, MEAN_AL -38.78\, MEAN_ETH -22.78\, MEAN_OL]
	MEAN_AL+MEAN_HYP+MEAN_LOP	AIC=30518 MSE=58.6771761332 F=889 LL=-15255	[46.40\,  -9.15\, MEAN_AL +9.17\, MEAN_HYP -16.71\, MEAN_LOP]

# bio1:TMeanY
##############
	MEAN_OO	AIC=29942 MSE=51.5278068581 F=6748 LL=-14969	[24.98\,  -37.06\, MEAN_OO]
	MEAN_LOP	AIC=30123 MSE=53.6841904564 F=6299 LL=-15059	[38.04\,  -18.19\, MEAN_LOP]
	MEAN_OL	AIC=30143 MSE=53.9336235256 F=6250 LL=-15069	[38.56\,  -36.95\, MEAN_OL]
--------- (20)
	MEAN_OL+MEAN_SF	AIC=26288 MSE=22.5217915701 F=10562 LL=-13141	[31.52\,  -33.63\, MEAN_OL +29.63\, MEAN_SF]
	MEAN_LOP+MEAN_SF	AIC=26306 MSE=22.6123981437 F=10511 LL=-13150	[31.03\,  -16.52\, MEAN_LOP +29.48\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=27375 MSE=28.8073241142 F=7776 LL=-13684	[30.84\,  -40.72\, MEAN_OO -25.78\, MEAN_AL]
--------- (2)
	MEAN_OL+MEAN_OO+MEAN_SF	AIC=26242 MSE=22.2851178622 F=7132 LL=-13117	[30.99\,  -30.81\, MEAN_OL -3.81\, MEAN_OO +28.06\, MEAN_SF]
	MEAN_LOP+MEAN_OO+MEAN_SF	AIC=26259 MSE=22.3664612615 F=7101 LL=-13125	[30.53\,  -15.11\, MEAN_LOP -3.88\, MEAN_OO +27.90\, MEAN_SF]

# bio5:TMaxWarmM
##############
	MEAN_OO	AIC=29715 MSE=48.9446828936 F=3871 LL=-14855	[36.00\,  -27.36\, MEAN_OO]
	MEAN_OL	AIC=30323 MSE=56.1742094493 F=2805 LL=-15159	[44.77\,  -25.26\, MEAN_OL]
	MEAN_LOP	AIC=30346 MSE=56.4670213813 F=2767 LL=-15171	[44.33\,  -12.36\, MEAN_LOP]
--------- (18)
	MEAN_OL+MEAN_SF	AIC=27733 MSE=31.2366770619 F=4284 LL=-13863	[38.50\,  -22.31\, MEAN_OL +26.40\, MEAN_SF]
	MEAN_LOP+MEAN_SF	AIC=27795 MSE=31.6819767664 F=4193 LL=-13894	[38.06\,  -10.88\, MEAN_LOP +26.33\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=28135 MSE=34.2172790808 F=3719 LL=-14064	[40.72\,  -30.31\, MEAN_OO -20.76\, MEAN_AL]
--------- (2)
	MEAN_AL+MEAN_OO+MEAN_OT	AIC=27491 MSE=29.5687702928 F=3101 LL=-13741	[40.40\,  -21.90\, MEAN_AL -22.87\, MEAN_OO -40.42\, MEAN_OT]
	MEAN_OL+MEAN_OO+MEAN_SF	AIC=27694 MSE=30.9567103969 F=2896 LL=-13843	[37.92\,  -19.23\, MEAN_OL -4.15\, MEAN_OO +24.70\, MEAN_SF]

# bio10:TMeanWarmQuarter
##############
	MEAN_OO	AIC=28667 MSE=38.6093242974 F=5641 LL=-14331	[29.92\,  -29.33\, MEAN_OO]
	MEAN_OL	AIC=29430 MSE=45.8891983326 F=4046 LL=-14713	[39.54\,  -27.42\, MEAN_OL]
	MEAN_LOP	AIC=29454 MSE=46.1371734813 F=4000 LL=-14725	[39.08\,  -13.44\, MEAN_LOP]
--------- (14)
	MEAN_OL+MEAN_SF	AIC=27011 MSE=26.5267100444 F=5111 LL=-13502	[34.01\,  -24.82\, MEAN_OL +23.26\, MEAN_SF]
	MEAN_LOP+MEAN_SF	AIC=27078 MSE=26.9344707512 F=5000 LL=-13536	[33.57\,  -12.13\, MEAN_LOP +23.18\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=27270 MSE=28.127895955 F=4694 LL=-13632	[33.91\,  -31.82\, MEAN_OO -17.51\, MEAN_AL]
--------- (2)
	MEAN_OL+MEAN_OO+MEAN_SF	AIC=26832 MSE=25.4675120401 F=3610 LL=-13412	[32.90\,  -18.89\, MEAN_OL -8.00\, MEAN_OO +19.98\, MEAN_SF]
	MEAN_LOP+MEAN_OO+MEAN_SF	AIC=26885 MSE=25.7733015443 F=3550 LL=-13438	[32.49\,  -9.08\, MEAN_LOP -8.37\, MEAN_OO +19.78\, MEAN_SF]

# bio6:TMinColdM
##############
	MEAN_LOP	AIC=31674 MSE=76.2791032783 F=7597 LL=-15835	[29.79\,  -23.81\, MEAN_LOP]
	MEAN_OO	AIC=31736 MSE=77.3462595335 F=7431 LL=-15866	[12.47\,  -47.65\, MEAN_OO]
	MEAN_OL	AIC=31736 MSE=77.3569130512 F=7430 LL=-15866	[30.40\,  -48.25\, MEAN_OL]
--------- (19)
	MEAN_LOP+MEAN_SF	AIC=27913 MSE=32.5353413701 F=11873 LL=-13953	[21.47\,  -21.83\, MEAN_LOP +34.98\, MEAN_SF]
	MEAN_OL+MEAN_SF	AIC=27979 MSE=33.0273585018 F=11664 LL=-13986	[22.03\,  -44.30\, MEAN_OL +35.19\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=29143 MSE=42.9934322196 F=8448 LL=-14568	[19.67\,  -52.15\, MEAN_OO -31.70\, MEAN_AL]
--------- (2)
	MEAN_HYP+MEAN_LOP+MEAN_SF	AIC=27746 MSE=31.3231570508 F=8279 LL=-13869	[23.51\,  -3.66\, MEAN_HYP -18.29\, MEAN_LOP +36.47\, MEAN_SF]
	MEAN_LOP+MEAN_OO+MEAN_SF	AIC=27852 MSE=32.0818707071 F=8048 LL=-13922	[20.79\,  -19.92\, MEAN_LOP -5.26\, MEAN_OO +32.84\, MEAN_SF]

# bio11:TMeanColdQ
##############
	MEAN_LOP	AIC=31565 MSE=74.4151016733 F=6941 LL=-15780	[35.56\,  -22.48\, MEAN_LOP]
	MEAN_OL	AIC=31627 MSE=75.473034969 F=6782 LL=-15811	[36.13\,  -45.53\, MEAN_OL]
	MEAN_OO	AIC=31854 MSE=79.4415991409 F=6222 LL=-15925	[19.01\,  -44.19\, MEAN_OO]
--------- (17)
	MEAN_LOP+MEAN_SF	AIC=27140 MSE=27.3127660282 F=13262 LL=-13567	[26.93\,  -20.43\, MEAN_LOP +36.29\, MEAN_SF]
	MEAN_OL+MEAN_SF	AIC=27217 MSE=27.7925982287 F=12995 LL=-13605	[27.45\,  -41.44\, MEAN_OL +36.50\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=28738 MSE=39.2240516235 F=8564 LL=-14366	[26.81\,  -49.06\, MEAN_OO -34.30\, MEAN_AL]
--------- (1)
	MEAN_HYP+MEAN_LOP+MEAN_SF	AIC=27128 MSE=27.2358810651 F=8871 LL=-13560	[27.46\,  -0.95\, MEAN_HYP -19.50\, MEAN_LOP +36.69\, MEAN_SF]

# bio8:TMeanWetQ
##############
	MEAN_OO	AIC=28086 MSE=33.8440108995 F=5397 LL=-14041	[27.43\,  -26.86\, MEAN_OO]
	MEAN_OL	AIC=28468 MSE=36.9089341202 F=4582 LL=-14232	[36.90\,  -26.17\, MEAN_OL]
	MEAN_LOP	AIC=28481 MSE=37.0155426654 F=4556 LL=-14238	[36.48\,  -12.84\, MEAN_LOP]
--------- (18)
	MEAN_OL+MEAN_SF	AIC=26510 MSE=23.680278993 F=4804 LL=-13252	[32.33\,  -24.02\, MEAN_OL +19.23\, MEAN_SF]
	MEAN_LOP+MEAN_SF	AIC=26555 MSE=23.9249899128 F=4733 LL=-13274	[31.93\,  -11.76\, MEAN_LOP +19.14\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=26945 MSE=26.1349266442 F=4146 LL=-13469	[30.85\,  -28.99\, MEAN_OO -15.02\, MEAN_AL]
--------- (2)
	MEAN_OL+MEAN_OO+MEAN_SF	AIC=26385 MSE=23.0146520906 F=3338 LL=-13188	[31.45\,  -19.31\, MEAN_OL -6.35\, MEAN_OO +16.63\, MEAN_SF]
	MEAN_LOP+MEAN_OO+MEAN_SF	AIC=26422 MSE=23.2088188922 F=3298 LL=-13207	[31.09\,  -9.37\, MEAN_LOP -6.58\, MEAN_OO +16.46\, MEAN_SF]

# bio9:TMeanDryQ
##############
	MEAN_LOP	AIC=32432 MSE=90.5573024393 F=5319 LL=-16214	[36.53\,  -21.71\, MEAN_LOP]
	MEAN_OL	AIC=32474 MSE=91.4309629953 F=5226 LL=-16235	[37.09\,  -43.99\, MEAN_OL]
	MEAN_OO	AIC=32621 MSE=94.5243759042 F=4911 LL=-16308	[20.58\,  -42.82\, MEAN_OO]
--------- (19)
	MEAN_LOP+MEAN_SF	AIC=28090 MSE=33.8698239709 F=10805 LL=-14042	[27.06\,  -19.46\, MEAN_LOP +39.82\, MEAN_SF]
	MEAN_OL+MEAN_SF	AIC=28126 MSE=34.1496036872 F=10699 LL=-14060	[27.57\,  -39.51\, MEAN_OL +40.00\, MEAN_SF]
	MEAN_OO+MEAN_AL	AIC=29193 MSE=43.4760797447 F=7930 LL=-14593	[29.36\,  -48.31\, MEAN_OO -38.64\, MEAN_AL]
--------- (3)
	MEAN_LOP+MEAN_OO+MEAN_SF	AIC=28052 MSE=33.5746387769 F=7280 LL=-14022	[27.60\,  -21.01\, MEAN_LOP +4.26\, MEAN_OO +41.55\, MEAN_SF]
	MEAN_LOP+MEAN_OT+MEAN_SF	AIC=28056 MSE=33.5993164213 F=7274 LL=-14024	[26.61\,  -18.74\, MEAN_LOP -9.41\, MEAN_OT +39.04\, MEAN_SF]
	MEAN_HYP+MEAN_LOP+MEAN_SF	AIC=28080 MSE=33.785681792 F=7225 LL=-14036	[26.49\,  +1.00\, MEAN_HYP -20.43\, MEAN_LOP +39.41\, MEAN_SF]

Clustering

In [16]:
ctypes = ["C:ones", "C:sizes", "C:wdist"]
nkmeans = "k-means"
linkage_args = [("Ward", {"method": "ward"}),
                ("Complete", {"method": "complete"}),
                # ("Single", {"method": "single"}),
                # ("Average", {"method": "average"}),
                # ("Centroid", {"method": "centroid"}),
                ("Weighted", {"method": "weighted"}),
                ("Median", {"method": "median"})]

collect_clusters = []
collect_clusters_names = []
linkZs = {} 

Redescriptions based clusters

In [17]:
if DO_CLUSTERS:
    
    for mi, ctype in enumerate(ctypes):
        for ni, nbc in enumerate(NBCS):
            keep_ids = C[:,map_clu["%s%d" % (ctype, nbc)]] > -1
            cluster_labels = C[keep_ids, map_clu["%s%d" % (ctype, nbc)]].astype(int)
            ccounts = numpy.bincount(cluster_labels)
            
            collect_clusters.append(C[:, map_clu["%s%d" % (ctype, nbc)]].astype(int))
            collect_clusters_names.append((ctype, nbc))

k-means clustering

In [18]:
if DO_CLUSTERS:
    
    meths_args = [(nkmeans, cluster.KMeans, {"n_clusters": nbc, "random_state": 10}) for nbc in NBCS]
                  # ("AP", cluster.AffinityPropagation, {"max_iter": 50})]
    
    for mi, (mname, meth, args) in enumerate(meths_args):
        cluster_labels = meth(**args).fit_predict(clu_data)
        ccounts = numpy.bincount(cluster_labels)[1:]
        
        collect_clusters.append(cluster_labels)
        collect_clusters_names.append((mname, args["n_clusters"]))

Hierarchical clustering

In [19]:
if DO_CLUSTERS:
    
    for li, (lname, args) in enumerate(linkage_args):
        Z = scipy.cluster.hierarchy.linkage(clu_data, **args)
        linkZs[lname] = Z
        
        for ni, nbc in enumerate(NBCS):
            cluster_labels = scipy.cluster.hierarchy.fcluster(Z, nbc, criterion="maxclust")
            ccounts = numpy.bincount(cluster_labels)[1:]
            
            collect_clusters.append(cluster_labels)
            collect_clusters_names.append((lname, nbc))

Ecoregions

In [20]:
if DO_CLUSTERS:
    fig, axes = plt.subplots(figsize=(1.5*FSIZE, 1.5*FSIZE))
    ecoregions_labels = C[:, map_clu["ECO_CODE"]].astype(int)
    
    axes, bm, bm_args = tools_geomap.prepare_map(map_prefs, map_corners[0]+map_corners[1], axe=axes)
    xs,ys = bm(lng_lat[:,0], lng_lat[:,1])
    axes.scatter(xs, ys, c=ecoregions_labels, s=5, edgecolors='none', zorder=10, alpha=.7)#, cmap=cmC)
    # axes.set_xlabel(" ".join(["%d" % len(ccounts)]+["%d" % i for i in ccounts]))
    axes.set_title(f"Ecoregions")
    mk_plot("clustersEcoregions")
/usr/lib/python3/dist-packages/mpl_toolkits/basemap/__init__.py:1704: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
  limb = ax.axesPatch
/usr/lib/python3/dist-packages/mpl_toolkits/basemap/__init__.py:1707: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
  if limb is not ax.axesPatch:

Comparing clusterings

In [21]:
if DO_CLUSTERS:
    
    nmi_meths = [ci for ci, nc in enumerate(collect_clusters_names) if nc[1] == NBC or nc[0] == nkmeans]
    nmi_meth_names = ["%s %d" % collect_clusters_names[ci] for ci in nmi_meths]+["ecoregions"]
    NMI = numpy.ones((len(nmi_meth_names), len(nmi_meth_names)))
    for ii, ci in enumerate(nmi_meths):
        for jj in range(ii):
            NMI[ii,jj] = metrics.normalized_mutual_info_score(collect_clusters[nmi_meths[ii]], collect_clusters[nmi_meths[jj]])
            NMI[jj,ii] = NMI[ii,jj]
        NMI[ii,-1] = metrics.normalized_mutual_info_score(collect_clusters[nmi_meths[ii]], ecoregions_labels)
        NMI[-1,ii] = NMI[ii,-1]

    fig, axes = plt.subplots(1,1, figsize=(2*FSIZE, 2*FSIZE))
    plt.imshow(NMI, cmap=cmB, vmin=-1, vmax=1)    
    plt.xticks(numpy.arange(len(nmi_meth_names)), nmi_meth_names, rotation=45)
    plt.yticks(numpy.arange(len(nmi_meth_names)), nmi_meth_names)
    axes.xaxis.set_ticks_position('top')
    mk_plot("clustersNMI")
In [22]:
if DO_CLUSTERS:
    
    clusters_sets = {}
    map_same = {}
    map_cc_cv = {}
    clusterings_info = {}
    for ci, cc in enumerate(collect_clusters):
        ck = collect_clusters_names[ci]
        clusterings_info[ck] = {"ci": ci, 
                                "silhouette": metrics.silhouette_score(clu_data, cc)}
        vals = set(cc)
        map_cc_cv[ck] = sorted(vals)
        for v in vals:
            ss = set(numpy.where(cc==v)[0])
            for okv, os in clusters_sets.items():
                if okv[0] != ck and len(ss) == len(os) and ss == os:
                    if okv not in map_same:                        
                        map_same[okv] = okv 
                    map_same[(ck, v)] = okv
                    break
            if (ck, v) not in map_same:
                clusters_sets[(ck, v)] = ss
    cks = sorted(clusters_sets.keys(), key=lambda x: len(clusters_sets[x]))
    
    contain = {}
    for ci in range(len(cks)):
        contain[cks[ci]] = []
        for cj in range(ci):
            if clusters_sets[cks[cj]].issubset(clusters_sets[cks[ci]]):
                contain[cks[ci]].append((len(clusters_sets[cks[cj]])/len(clusters_sets[cks[ci]]), cks[cj]))

    sub_parts = {}
    for k, oss in contain.items():
        if len(oss) > 1:
            subs = {}
            for fct, oks in oss:
                if oks[0] not in subs:
                    subs[oks[0]] = []
                subs[oks[0]].append(oks[1])
            X = [okk for okk, ss in subs.items() if len(ss) > 1]            
            for x in X:
                union_set = set().union(*[clusters_sets[(x, kk)] for kk in subs[x]])
                if clusters_sets[k] == union_set:
                    if k not in sub_parts:
                        sub_parts[k] = []
                    sub_parts[k].append([(x, kk) for kk in subs[x]])
            if k in sub_parts:
                if len(sub_parts[k]) > 1:
                    print("Multiple mappings", k, sub_parts[k])
                    # pdb.set_trace()
                sub_parts[k] = sub_parts[k][0]
                    
    sml_cks = [ck for ck in cks if ck not in sub_parts]
    
    dists = numpy.zeros((len(sml_cks), len(sml_cks)))
    for ci in range(len(sml_cks)):
        for cj in range(ci):
            dists[ci,cj] = 1-len(clusters_sets[sml_cks[ci]].intersection(clusters_sets[sml_cks[cj]]))/len(clusters_sets[sml_cks[ci]].union(clusters_sets[sml_cks[cj]]))
            dists[cj,ci] = dists[ci,cj]

    L = numpy.diag(dists.sum(axis=0)) - dists
    egval, egvect = eigsh(L, 2)
    ord_ks = numpy.argsort(egvect[:,1])#[-5:]

    dist_seq = numpy.array([0.]+[dists[ord_ks[i-1],ord_ks[i]] for i in range(1, len(ord_ks))])
    # dist_seq = numpy.array([0.]+[1*(dists[ord_ks[i-1],ord_ks[i]]>0) for i in range(1, len(ord_ks))])
    scaleOD = numpy.cumsum(dist_seq/numpy.sum(dist_seq))
    map_ccolor_scores = dict(zip([sml_cks[oi] for oi in ord_ks], scaleOD))

    clusters_info = {}
    for nc, cs in map_cc_cv.items():
        for ci in cs:
            ck = (nc, ci)
            cck = map_same.get(ck, ck)
            ccolor, parts = -1, []
            if cck in map_ccolor_scores:
                ccolor = map_ccolor_scores[cck]
            elif cck in sub_parts:
                parts = list(sub_parts[cck])
                pp_in = [pk for pk in parts if map_same.get(pk, pk) in map_ccolor_scores]
                pp_out = [pk for pk in parts if map_same.get(pk, pk) not in map_ccolor_scores]
                while len(pp_out) > 0:
                    pparts = []
                    for pk in pp_out:
                        pparts.extend(sub_parts[map_same.get(pk, pk)])
                            
                    pp_in.extend([pk for pk in pparts if map_same.get(pk, pk) in map_ccolor_scores])
                    pp_out = [pk for pk in pparts if map_same.get(pk, pk) not in map_ccolor_scores]
                collect_ccolor = [(map_ccolor_scores[pk], len(clusters_sets[pk])) for pk in pp_in]
                ccolor = sum([cc[0]*cc[1] for cc in collect_ccolor])/sum([cc[1] for cc in collect_ccolor])
                parts = pp_in
            else:
                print("%s NOT FOUND!!" % ck)
                
            clusters_info[ck] = {"basis": cck, "ccolor": ccolor, "parts": parts,
                              "center": numpy.mean(lng_lat[list(clusters_sets[cck]), :], axis=0),
                              "size": len(clusters_sets[cck]),
                              "label": chr(ord("A")+int(25*ccolor)) + chr(ord("a")+int(25*(25*ccolor)%25)),
                              }
In [23]:
if DO_CLUSTERS:
    
    method_family = [nkmeans]+ctypes
    nb_rows = len(NBCS)
    nb_cols = len(method_family)
    fig, axes = plt.subplots(nb_rows, nb_cols, figsize=(FSIZE*nb_cols, FSIZE*nb_rows))
    for xi, nm in enumerate(method_family):
        for yi, nbc in enumerate(NBCS):
            clustering_info = {}
            plot_clusters(axes, xi, yi, lng_lat, nm, nbc, map_cc_cv[(nm, nbc)], clusters_sets, clusters_info, clusterings_info[(nm, nbc)], cmC)
    mk_plot("clustersKRMaps")
/usr/lib/python3/dist-packages/mpl_toolkits/basemap/__init__.py:1704: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
  limb = ax.axesPatch
/usr/lib/python3/dist-packages/mpl_toolkits/basemap/__init__.py:1707: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
  if limb is not ax.axesPatch:
In [24]:
if DO_CLUSTERS:
    
    method_family = [l[0] for l in linkage_args]
    nb_rows = len(NBCS)+1
    nb_cols = len(method_family)
    fig, axes = plt.subplots(nb_rows, nb_cols, figsize=(FSIZE*nb_cols, FSIZE*nb_rows))
    for xi, nm in enumerate(method_family):
        scipy.cluster.hierarchy.dendrogram(linkZs[nm], p=3, orientation="left", truncate_mode="level", link_color_func=lambda k: "black", no_labels=True, ax=axes[-1,xi])
        for yi, nbc in enumerate(NBCS):
            clustering_info = {}
            plot_clusters(axes, xi, yi, lng_lat, nm, nbc, map_cc_cv[(nm, nbc)], clusters_sets, clusters_info, clusterings_info[(nm, nbc)], cmC)
    mk_plot("clustersHMaps")
/usr/lib/python3/dist-packages/mpl_toolkits/basemap/__init__.py:1704: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
  limb = ax.axesPatch
/usr/lib/python3/dist-packages/mpl_toolkits/basemap/__init__.py:1707: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
  if limb is not ax.axesPatch: